library(Matrix)
library(Matrix.utils)
library(tidyverse)
## ── Attaching core tidyverse packages ─────────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.1 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ readr::col_factor() masks scales::col_factor()
## ✖ purrr::discard() masks scales::discard()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Seurat)
## Attaching SeuratObject
library(dplyr)
library(knitr)
library(ggsci)
library(RCurl)
## Warning: package 'RCurl' was built under R version 4.2.3
##
## Attaching package: 'RCurl'
##
## The following object is masked from 'package:tidyr':
##
## complete
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(SingleR)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
##
## The following object is masked from 'package:dplyr':
##
## count
##
##
## Attaching package: 'MatrixGenerics'
##
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
##
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
##
## The following objects are masked from 'package:lubridate':
##
## intersect, setdiff, union
##
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
##
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
##
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
##
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
##
## The following objects are masked from 'package:lubridate':
##
## second, second<-
##
## The following objects are masked from 'package:dplyr':
##
## first, rename
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## The following objects are masked from 'package:Matrix':
##
## expand, unname
##
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
##
## The following object is masked from 'package:lubridate':
##
## %within%
##
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## The following object is masked from 'package:purrr':
##
## reduce
##
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
##
## Attaching package: 'Biobase'
##
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
##
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
##
##
## Attaching package: 'SummarizedExperiment'
##
## The following object is masked from 'package:SeuratObject':
##
## Assays
##
## The following object is masked from 'package:Seurat':
##
## Assays
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(multtest)
library(clustree)
## Loading required package: ggraph
library(limma)
##
## Attaching package: 'limma'
##
## The following object is masked from 'package:BiocGenerics':
##
## plotMA
#Quality control metrices
load("~/work/PI3K/data/SeuratObjects/merged/Human/merged_seurat.RData")
merged_filtered_seurat <- subset(merged_seurat, subset = Sample_Tag != 'NA' & Sample_Tag != 'Undetermined' & Sample_Tag != 'SampleTag01_hs' & Sample_Tag != 'Multiplet')
merged_filtered_seurat$nCount_RNA <- NULL
merged_filtered_seurat$nFeature_RNA <- NULL
merged_filtered_seurat_meta <- merged_filtered_seurat@meta.data
subset_control <- subset(merged_filtered_seurat_meta, Sample_Tag == "Control" | Sample_Tag == "Control_tagged")
subset_alpelisib <- subset(merged_filtered_seurat_meta, Sample_Tag == "Alpelisib")
subset_copanlisib <- subset(merged_filtered_seurat_meta, Sample_Tag == "Copanlisib")
#cell count
subset_control %>%
ggplot(aes(x=Xenograft, fill=Xenograft)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells from the untreated xenografts")+
scale_fill_lancet()
The figures are showing different quality metrices for the control samples.
# Extract identity and sample information from dataframe to determine the number of cells per sample_tag
n_cells_control<- table(subset_control$Sample_Tag)
# View table
kable(n_cells_control,
col.names = c("Sample_Tag",
"nCells"),
caption = "The table shows the number of cells which were untreated.")
| Sample_Tag | nCells |
|---|---|
| Control | 16260 |
#UMI count
subset_control %>%
ggplot(aes(color=Xenograft, x=nUMI, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
ylab("Cell density") +
geom_vline(xintercept = 500)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nUMi from the control xenografts")+
scale_fill_lancet()
The figures are showing different quality metrices for the control samples.
#Gene Count
subset_control %>%
ggplot(aes(color=Xenograft, x=nGene, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 300)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nGene from the control xenografts")+
scale_fill_lancet()
The figures are showing different quality metrices for the control samples.
#joint filtering effect
subset_control %>%
ggplot(aes(x=nUMI, y=nGene)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
theme_classic() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~Sample_Tag)+
scale_fill_lancet()
## `geom_smooth()` using formula = 'y ~ x'
The figures are showing different quality metrices for the control samples.
#Mitochondrial count
subset_control %>%
ggplot(aes(color=Xenograft, x=percent.mt, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 2)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("Mitochondrial gene expression in [%] \n from the control xenografts")+
scale_fill_lancet()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 22 rows containing non-finite values (`stat_density()`).
The figures are showing different quality metrices for the control samples.
#cell count
subset_copanlisib %>%
ggplot(aes(x=Xenograft, fill=Xenograft)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells from the xenografts treated with Copanlisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the copanlisib samples.
# Extract identity and sample information from dataframe to determine the number of cells per sample_tag
n_cells_copanlisib<- table(subset_copanlisib$Sample_Tag)
# View table
kable(n_cells_copanlisib,
col.names = c("Sample_Tag",
"nCells"),
caption = "The table shows the number of cells which were treated with Copanlisib.")
| Sample_Tag | nCells |
|---|---|
| Copanlisib | 2591 |
#UMI count
subset_copanlisib %>%
ggplot(aes(color=Xenograft, x=nUMI, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
ylab("Cell density") +
geom_vline(xintercept = 500)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nUMI from the xenografts treated with Copanlisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the copanlisib samples.
#Gene Count
subset_copanlisib %>%
ggplot(aes(color=Xenograft, x=nGene, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 300)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nGene from the xenografts treated with Copanlisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the copanlisib samples.
#joint filtering effect
subset_copanlisib %>%
ggplot(aes(x=nUMI, y=nGene)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~Sample_Tag)+
scale_fill_lancet()
## `geom_smooth()` using formula = 'y ~ x'
The figures are showing different quality metrices for the copanlisib samples.
#Mitochondrial count
subset_copanlisib %>%
ggplot(aes(color=Xenograft, x=percent.mt, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 2)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("Mitochondrial gene expression in [%] from \n the xenografts treated with Copanlisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the copanlisib samples.
#cell count
subset_alpelisib %>%
ggplot(aes(x=Xenograft, fill=Xenograft)) +
geom_bar() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("NCells from the xenografts treated with Alpelisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the alpelisib samples.
# Extract identity and sample information from dataframe to determine the number of cells per sample_tag
n_cells_alp<- table(subset_alpelisib$Sample_Tag)
# View table
kable(n_cells_alp,
col.names = c("Sample_Tag",
"nCells"),
caption = "The table shows the number of cells which were treated with Alpelisib.")
| Sample_Tag | nCells |
|---|---|
| Alpelisib | 17198 |
#UMI count
subset_alpelisib %>%
ggplot(aes(color=Xenograft, x=nUMI, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
ylab("Cell density") +
geom_vline(xintercept = 500)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nUMI from the xenografts treated with Alpelisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the alpelisib samples.
#Gene Count
subset_alpelisib %>%
ggplot(aes(color=Xenograft, x=nGene, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 300)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("nGene from the xenografts treated with Alpelisib")+
scale_fill_lancet()
The figures are showing different quality metrices for the alpelisib samples.
#joint filtering effect
subset_alpelisib %>%
ggplot(aes(x=nUMI, y=nGene)) +
geom_point() +
scale_colour_gradient(low = "gray90", high = "black") +
stat_smooth(method=lm) +
scale_x_log10() +
scale_y_log10() +
geom_vline(xintercept = 500) +
geom_hline(yintercept = 250) +
facet_wrap(~Sample_Tag)+
scale_fill_lancet()
## `geom_smooth()` using formula = 'y ~ x'
The figures are showing different quality metrices for the alpelisib samples.
#Mitochondrial count
subset_alpelisib %>%
ggplot(aes(color=Xenograft, x=percent.mt, fill=Xenograft)) +
geom_density(alpha = 0.2) +
scale_x_log10() +
geom_vline(xintercept = 2)+
theme(plot.title = element_text(hjust=0.5, face="bold")) +
ggtitle("Mitochondrial gene expression in [%] from \n the xenografts treated with Alpelisib")+
scale_fill_lancet()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 9 rows containing non-finite values (`stat_density()`).
The figures are showing different quality metrices for the alpelisib samples.
merged_filtered_seurat_meta %>%
ggplot(aes(x=Xenograft, fill=nUMI)) +
geom_bar() +
labs(x="Xenografts", y="Number of cells",
title="Overview of the number of cells by xenografts and condition") +
facet_grid(~Sample_Tag) +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) +
scale_fill_lancet()
## Warning: The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the
## data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable
## into a factor?
## The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the
## data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable
## into a factor?
## The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in the
## data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical variable
## into a factor?
#Investigation of unwanted variations - cell cycle phases and mitochondrial gene expression
load("~/work/PI3K/data/SeuratObjects/filtered/Human/filtered_seurat.RData")
filtered_seurat$nCount_RNA <- NULL
filtered_seurat$nFeature_RNA <- NULL
filtered_seurat$orig.ident <- NULL
load("~/work/PI3K/data/SuplementaryFiles/SeuratObjects/cycle.rda")
filtered_phase_seurat <- CellCycleScoring(filtered_seurat,
g2m.features = g2m_genes,
s.features = s_genes)
## Warning: The following features are not present in the object: RAD51, not
## searching for symbol synonyms
filtered_phase_seurat <- FindVariableFeatures(filtered_phase_seurat,
selection.method = "vst",
nfeatures = 2000,
verbose = FALSE)
filtered_phase_seurat <- ScaleData(filtered_phase_seurat)
## Centering and scaling data matrix
filtered_phase_seurat <- RunPCA(filtered_phase_seurat)
## PC_ 1
## Positive: RETNLB, IL10, CPA3, SPRR3, AL355922.1, UPK1B, NKG7, IVL, HBQ1, TPSG1
## TPSD1, HBG1, OR6M1, ALAS2, KCCAT333, HBB, SPRR2A, NTS, WIF1, PCP4L1
## HBA2, LYPD2, ESM1, AGTR2, HBA1, OR51E1, CALCA, TNNC2, MT3, GPHA2
## Negative: RPS19, RPL11, RPLP1, RPS8, RPS12, RACK1, RPS13, RPS20, RPL37A, RPL27A
## RPS11, RPS24, TMSB10, NAP1L1, RPL15, RPS3, RPS6, RPL14, SEC61G, RPL5
## CALU, RPL13, ATP5F1E, LGALS1, RPS28, RPS16, RPL18, CD63, RPL35A, AC016739.1
## PC_ 2
## Positive: RPS15A, PERP, RPL3, RPL26P19, SNHG5, HMGN1, RPL6, RPL21, SNHG6, LRRC75A-AS1
## RPL9, C1orf43, RPL22, RPL7A, RPL13A, AC024293.1, GGCT, RPL7L1, AC008026.1, ZFAS1
## CSTB, RPL7P9, COX6C, DSG2, SRP9, ADI1, EPCAM, GAS5, TFAP2A, COMMD6
## Negative: SPARC, COL1A2, COL3A1, COL1A1, COL5A2, DCN, FBN1, SERPING1, PCOLCE, COL5A1
## FKBP7, PAM, AEBP1, CREB3L1, NFIX, BMP1, SERPINF1, CCDC80, OLFML2B, FSTL1
## PDGFRA, TGFBR3, IGFBP7, FGFR1, EBF1, TGFBR2, CYR61, TUBA1A, CDH11, C1R
## PC_ 3
## Positive: SAA1, CTSS, SAA3P, PFN1, F13A1, ARG1, SAA2, LYZ, BACH1-IT3, BEND3
## AC012618.1, AIF1, ACTG2, CTSC, ECM1, RPS15AP24, RPS26P52, RARS2, FABP5, PRDX1
## RPL35P4, CD72, AL162430.1, PF4V1, AL355802.1, C1QB, SEC61G, FABP5P7, MAF, C1QA
## Negative: MTND3P17, COL3A1, COL5A2, CTHRC1, LRRC75A-AS1, COL1A1, FNDC1, SNHG5, SDC2, COL1A2
## THBS2, PRAME, SPARC, ASPN, RPS15A, HOOK1, COL12A1, GPC3, RPL6, ITM2A
## SNHG6, LUM, PRRX1, FSTL1, PERP, EN1, GGCT, UCHL1, RPL26P19, GAS5
## PC_ 4
## Positive: COL12A1, LIMA1, SERPINH1, TIMP3, MTND3P17, THBS2, SPATS2L, CAST, KRT6A, CAMK2N1
## IFI44, SLC16A2, IFI27, BST2, COL1A1, KCNMA1, FSTL1, PLS3, TNFSF10, SEMA3C
## KLK5, TPM1, GBP1, SAMD9, FHL2, OAS2, OAS3, F3, S100A2, RAB31
## Negative: RPS12, RPL37A, RPS8, RPS24, HLA-DRB1, RPS11, RPS19, HLA-DPB1, PRAME, RPL37
## RPS13, RPL27A, HLA-DQB1, CD74, AC008481.1, RPL31, CXCR4, RPL11, RPS7, RPS18
## RACK1, RPL12, HLA-DRB5, HLA-DRA, HLA-DRB6, HLA-DPA1, UCHL1, HLA-DQA1, LRRC75A-AS1, RPS27A
## PC_ 5
## Positive: EFEMP1, TNXB, C1R, DCN, ACKR3, FCRL2, LGI2, OGN, C3, PLPP3
## ADAMTS5, COL14A1, CADM3, LBP, TGFBR2, NOVA1, GSN, CYGB, SLC4A4, TMEFF2
## HTRA3, THBS3, CXCL12, C1S, TGFBR3, AC011352.1, MT2P1, KLF4, LAMA2, MT2A
## Negative: COL12A1, NCAM1, CDH11, CALD1, COL8A1, PMEPA1, THBS2, TPM2, HS6ST2, ACTA2
## AC116917.1, CTGF, POSTN, TNC, GNG11, COL6A3, COL5A2, IGFBP7, LBH, SEMA7A
## CNN3, BMP1, THBS1, TPM1, OLFML2B, LOXL3, IGFBP3, COL5A1, TAGLN, ACTN1
DimPlot(filtered_phase_seurat,
reduction = "pca",
group.by= "Phase",
split.by = "Phase")+
ggtitle("PCA visualization of the cell cycle phases")+
scale_fill_lancet()
filtered_phase_seurat <- RunUMAP(filtered_phase_seurat,
dims = 1:40,
reduction = "pca")
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 16:01:51 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:01:51 Read 25592 rows and found 40 numeric columns
## 16:01:51 Using Annoy for neighbor search, n_neighbors = 30
## 16:01:51 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:01:56 Writing NN index file to temp file /tmp/RtmpircTMj/fileb65bd7b200354
## 16:01:56 Searching Annoy index using 1 thread, search_k = 3000
## 16:02:04 Annoy recall = 100%
## 16:02:05 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:02:07 Initializing from normalized Laplacian + noise (using irlba)
## 16:02:08 Commencing optimization for 200 epochs, with 1289790 positive edges
## 16:02:22 Optimization finished
DimPlot(filtered_phase_seurat,
reduction = "umap",
split.by = "Phase",
group.by = "Sample_Tag") +
ggtitle("UMAP splitted by cell cycle phase \n and grouped by conditions") +
scale_fill_lancet()
FeaturePlot(filtered_phase_seurat,
features = "percent.mt")+
ggtitle("Mitochondrial gene expression distribution in clusters")+
scale_fill_lancet()
#Integration
integrated_filtered_sct_cond_seurat <- readRDS("~/work/PI3K/data/SeuratObjects/integrated/Human/integrated_filtered_sct_cond_seurat.rds")
p1 <- DimPlot(integrated_filtered_sct_cond_seurat, reduction = "umap", group.by = "Sample_Tag") +
labs(title = "UMAP visualisation of the condition distribution") +
scale_fill_lancet()
p2 <- DimPlot(integrated_filtered_sct_cond_seurat, reduction = "umap", label = TRUE, repel = TRUE)+
labs(title = "UMAP visualisation of the number of clusters") +
scale_fill_lancet()
p3 <- DimPlot(integrated_filtered_sct_cond_seurat, reduction = "umap", group.by = "Xenograft")+
labs(title = "UMAP visualisation of the xenograft distribution") +
scale_fill_lancet()
p1
p2
p3
#Clustering analysis
seurat_integrated <- readRDS("~/work/PI3K/data/SeuratObjects/clusteranalysis/Human/seurat_integrated_newidents.rds")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.1"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6) +
labs(title = "UMAP visualisation shows the number of clusters \n for resolution 1.0") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.2"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 0.2") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.4"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 0.4") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.6"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 0.6") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.0.8"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 0.8") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.1.4"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 1.4") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.1.6"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 1.6") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.1.8"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 1.8") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.2"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 2.0") +
scale_fill_lancet(name="ClusterIDs")
# Assign identity of clusters
Idents(object = seurat_integrated) <- "integrated_snn_res.2.4"
# Plot the UMAP
DimPlot(seurat_integrated,
reduction = "umap",
label = TRUE,
label.size = 6)+
labs(title = "UMAP visualisation shows the number of clusters \n for the resolution 2.4") +
scale_fill_lancet(name="ClusterIDs")
#Celltype identification singleR
seurat_integrated_renameidents <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/Human/SingleR/seurat_integrated_singleR.rds")
DimPlot(object = seurat_integrated_renameidents,
reduction = "umap",
label = TRUE,
label.size = 3,
repel = TRUE) +
labs(title = "UMAP visualisation shows the different celltypes \n for the resolution 0.4") +
scale_fill_lancet(name="Celltypes")
n_cells0.4.long <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/Human/SingleR/n_cells0.4.rds")
plt_0.4 <- ggplot(n_cells0.4.long,aes(ClusterID, singlr_labels)) +
geom_tile(aes(fill=Occurances)) +
geom_text(aes(label = round(Occurances, 1)), size=2) +
scale_fill_gradient(low = "white", high = "red") +
labs(y="Celltypes")
plt_0.4
#Label comparison
df_merged <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/df_merged_singleR_labels.rds")
plt_merged <- ggplot(df_merged,aes(x = singlr_labels.human, y= singlr_labels.mouse)) +
geom_tile(aes(fill=Cell_Index_count)) +
geom_text(aes(label = Cell_Index_count), size=1.5) +
scale_fill_gradient(low = "white", high = "red") +
labs(x="Celltypes_human", y="Celltypes_mouse") +
theme(axis.text.x = element_text(angle=90))
plt_merged
#Celltype identification puram
seurat_integrated_renamedIdents <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/Human/Puram/seurat_integrated_puram.rds")
DimPlot(object = seurat_integrated_renamedIdents,
reduction = "umap",
label = TRUE,
label.size = 3,
repel = TRUE) +
labs(title = "UMAP visualisation shows the different celltypes \n for the resolution 0.4") +
scale_fill_lancet(name="Celltypes")
n_cells0.4.long <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/Human/Puram/n_cells0.4.rds")
plt_0.4 <- ggplot(n_cells0.4.long,aes(ClusterID, predicted.id)) +
geom_tile(aes(fill=Occurances)) +
geom_text(aes(label = round(Occurances, 1)), size=2) +
scale_fill_gradient(low = "white", high = "red") +
labs(y="Celltypes")
plt_0.4
#Transcript analysis
umi_counts_merged <- read_delim("~/work/PI3K/data/SuplementaryFiles/CSVFiles/umi_count_merged.csv")
## Rows: 34609 Columns: 4
## ── Column specification ───────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Cell_Index, Labels
## dbl (2): UMIs_human, UMIs_mouse
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#create scatter plot
sp <- ggplot(umi_counts_merged, aes(x = UMIs_human, y = UMIs_mouse, fill=Labels)) + geom_point(aes(colour = Labels))
sp + geom_abline() +
xlab("Transcripts human") +
ylab("Transcripts mouse")
#Celltype identification demultiplexed
seurat_integrated <- readRDS("~/work/PI3K/data/SeuratObjects/cellfiltering_umi/Human/seurat_integrated_with_spec_labels.rds")
seurat_integrated_filtered_human <- readRDS("~/work/PI3K/data/SeuratObjects/demultiplexed/Human/seurat_integrated_filtered_human.rds")
UMAP_human <- DimPlot(seurat_integrated, reduction = "umap", group.by= "Labels", cols = c("red","lightgrey","purple")) + ggtitle("UMAP human cells")
UMAP_human
seurat_integrated_filtered_human <- RunUMAP(seurat_integrated_filtered_human, dims = 1:30)
## 16:04:31 UMAP embedding parameters a = 0.9922 b = 1.112
## 16:04:31 Read 25592 rows and found 30 numeric columns
## 16:04:31 Using Annoy for neighbor search, n_neighbors = 30
## 16:04:31 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 16:04:33 Writing NN index file to temp file /tmp/RtmpircTMj/fileb65bd6ac50948
## 16:04:33 Searching Annoy index using 1 thread, search_k = 3000
## 16:04:40 Annoy recall = 100%
## 16:04:41 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 16:04:43 Initializing from normalized Laplacian + noise (using irlba)
## 16:04:44 Commencing optimization for 200 epochs, with 1140710 positive edges
## 16:04:57 Optimization finished
DimPlot(seurat_integrated_filtered_human, reduction = "umap", group.by = "Sample_Tag") + ggtitle("UMAP human cells by conditions")
metadata <- seurat_integrated@meta.data
metadata %>% select(2,26) -> metadata
table(metadata)
## Labels
## Xenograft mainly_human mainly_mouse unique_human
## HN10621 76 3560 628
## HN10960 0 160 32
## HN11097_Control 4 1318 172
## HN15239A_Alpelisib 106 7351 1815
## HN15239A_Control 29 10127 214
prop.table(table(metadata))
## Labels
## Xenograft mainly_human mainly_mouse unique_human
## HN10621 0.0029696780 0.1391059706 0.0245389184
## HN10960 0.0000000000 0.0062519537 0.0012503907
## HN11097_Control 0.0001562988 0.0515004689 0.0067208503
## HN15239A_Alpelisib 0.0041419193 0.2872381994 0.0709206002
## HN15239A_Control 0.0011331666 0.3957095967 0.0083619881
seurat_integrated_renamedIdents <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/Human/Puram/seurat_integrated_puram.rds")
DimPlot(object = seurat_integrated_renamedIdents,
reduction = "umap",
label = TRUE,
label.size = 3,
repel = TRUE) +
labs(title = "UMAP visualisation shows the different celltypes \n for the resolution 0.4") +
scale_fill_lancet(name="Celltypes")
n_cells0.4.long <- readRDS("~/work/PI3K/data/SeuratObjects/celltypeIdentification/Human/Puram/n_cells0.4.rds")
plt_0.4 <- ggplot(n_cells0.4.long,aes(ClusterID, predicted.id)) +
geom_tile(aes(fill=Occurances)) +
geom_text(aes(label = round(Occurances, 1)), size=2) +
scale_fill_gradient(low = "white", high = "red") +
labs(y="Celltypes")
plt_0.4